package com.demo.config.apache.NLP;

import com.huaban.analysis.jieba.JiebaSegmenter;
import com.huaban.analysis.jieba.WordDictionary;

import java.io.*;
import java.nio.file.FileSystems;
import java.nio.file.Path;
import java.util.List;
import java.util.stream.Stream;

public class NameLocationRecognition {

	private static String dataPath = "D:\\file\\txt\\Chinese_Names_Corpus（120W）.txt";

	public static void main(String[] args) throws IOException {
		cidian();
	}

	/**
	 * 批量给txt添加 位数长度以及识别规则--！！！仅仅支持单次添加
	 * @throws IOException
	 */
	public static void txt() throws IOException {
		// 读取txt文件
		File file = new File(dataPath);
		BufferedReader reader = new BufferedReader(new FileReader(file));
		String line = "";
		StringBuilder content = new StringBuilder();

		while ((line = reader.readLine()) != null) {
			content.append(line).append(" ").append(line.length()).append(" nr").append("\n");
		}
		reader.close();
		BufferedWriter writer = new BufferedWriter(new FileWriter(file));
		writer.write(content.toString());
		writer.close();

	}

	public static void cidian() {
		// 加载自定义的词典
		Path path = FileSystems.getDefault().getPath(dataPath);
		WordDictionary.getInstance().loadUserDict(path);
		JiebaSegmenter jiebaSegmenter = new JiebaSegmenter();
		List<String> strings = jiebaSegmenter.sentenceProcess("周杰伦周冬雨,《开端》《镜双城》《淘金》三部热播剧均有她，你发现了吗？");
		strings.stream().forEach(System.out::println);
	}

}